home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Mac Mania 6
/
MacMania 6.toast
/
/
Tools&Utilities
/
EnterAct Stuff
/
hAWK project
/
AWK Source
/
hAWK_Sort.c
< prev
next >
Wrap
Text File
|
1994-12-22
|
9KB
|
331 lines
/* hAWK_Sort.c : builtin sort capability */
/* Copyright © 1986, 1988, 1989 1991 the Free Software Foundation, Inc.
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language, modified for the Macintosh (also called hAWK).
* GAWK is free software; you can redistribute or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 1, or any later version.
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with GAWK; see the file "COPYING hAWK". If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* Written for THINK C 4 on the Macintosh by Ken Earle (Dynabyte) Aug 1991.
*/
#include "AWK.H"
enum {ASCII_SORT, RASCII_SORT, DICT_SORT, RDICT_SORT,
NUM_SORT, RNUM_SORT, roentiyehs = 999};
#define NUMSORTS 6
/* from ARRAY.C */
extern void assoc_clear(NODE *symbol);
extern struct search *assoc_scan_sort(NODE *symbol);
extern struct search *assoc_next_sort(struct search *lookat);
/* hAWK_Sort.c functions */
short b_get_three(NODE *tree, NODE **res1, NODE **res2, NODE **res3);
NODE *do_sort(NODE *tree);
static long create_index_array(NODE *arr, NODE *ind, short sort_type);
void InitCompFuncArray(void);
void SortArray(long numIndexes, short sort_type);
void qs(long lb, long ub);
long Rearrange(long lb, long ub);
void SwapSorters(long i, long j);
/* comparison functions */
short ASCIIComp(long i, long j);
short RASCIIComp(long i, long j);
short DictComp(long i, long j);
short RDictComp(long i, long j);
short NumComp(long i, long j);
short RNumComp(long i, long j);
short PtrLenPtrLenCompare(Ptr spatPtr, short patLen, Ptr stargetPtr, short targetLen);
short b_get_three(NODE *tree, NODE **res1, NODE **res2, NODE **res3)
{
if (!tree) {
return 0;
}
*res1 = tree->lnode;
if (!tree->rnode)
return 1;
tree = tree->rnode;
*res2 = tree->lnode;
if (!tree->rnode)
return 2;
tree = tree->rnode;
*res3 = tree_eval(tree->lnode);
return 3;
}
NODE *do_sort(NODE *tree)
{
NODE *t1, *t2, *t3; /* array, array, string */
char *order_c;
char sort_char,
*s;
NODE *arr, *ind;
short num_args, sort_type, in_reverse = 0;
if ((num_args = b_get_three(tree, &t1, &t2, &t3)) < 3)
{
if (num_args == 2)
sort_type = ASCII_SORT;
else
fatal("sort requires at least two arguments");
}
else
{
order_c = force_string(t3)->stptr;
if (*order_c == 'r' || *order_c == 'R')
{
in_reverse = 1;
sort_char = *(order_c + 1);
}
else
sort_char = *order_c;
switch (sort_char)
{
case 'n':
case 'N':
sort_type = NUM_SORT;
break;
case 'd':
case 'D':
sort_type = DICT_SORT;
break;
case 'a':
case 'A':
default:
sort_type = ASCII_SORT;
break;
}
if (in_reverse)
++sort_type;
free_temp(t3);
}
arr = t1;
if (t1->type == Node_param_list)
arr = stack_ptr[t1->param_cnt];
if (arr->type != Node_var && arr->type != Node_var_array)
fatal("first argument of sort is not a variable");
ind = t2;
if (t2->type == Node_param_list)
ind = stack_ptr[t2->param_cnt];
if (ind->type != Node_var && ind->type != Node_var_array)
fatal("second argument of sort is not a variable");
assoc_clear(ind);
return tmp_number((AWKNUM)
create_index_array(arr, ind, sort_type));
}
static NODE **stat_sorter;
/* loop over arr, copying index and element pointers; sort this
temp array on values of elements, then use this order to create
numerically-indexed ind array via
*assoc_lookup(ind, tmp_number((AWKNUM) (number)))
= make_string(ptr to index string, len of string);
1 hiccup results of assoc_scan of arr[] into temporary
handled array of NODE pointers
2 sort array of NODE pointers by element strings
3 walk sorted NODE pointers and create the array ind[] with
indexes 1: up and values equal to arr[] indexes
4 return number of elements in ind[]
Prelim version: walk array to precalculate size of sorter, then walk
it again and fill in the buckets. Not a horrible waste of time.
*/
static long create_index_array(NODE *arr, NODE *ind, short sort_type)
{
struct search *l;
long j, k, numIndexes = 0;
for (l = assoc_scan_sort(arr); l; l = assoc_next_sort((struct search *)l))
{
++numIndexes;
}
if (numIndexes == 0)
return(0);
emalloc(stat_sorter, NODE **, (sizeof(NODE *) *
numIndexes), "create_index_array");
for (j = 0, l = assoc_scan_sort(arr); l; l = assoc_next_sort((struct search *)l), ++j)
{
if (sort_type >= NUM_SORT)
force_number(l->retval->ahvalue);
else
force_string(l->retval->ahvalue);
*(stat_sorter + j) = l->retval;
}
SortArray(numIndexes, sort_type);
for (j = 0, k = 1; j < numIndexes; ++j, ++k)
{
*assoc_lookup(ind, tmp_number((AWKNUM) (k)))
= make_string((*(stat_sorter+j))->ahname->stptr,
(*(stat_sorter+j))->ahname->stlen);
}
free(stat_sorter);
return(numIndexes);
}
typedef short (*CompareFunction)(long lb, long ub);
static CompareFunction compFuncArray[NUMSORTS];
static CompareFunction compFunc;
void InitCompFuncArray()
{
compFuncArray[0] = ASCIIComp;
compFuncArray[1] = RASCIIComp;
compFuncArray[2] = DictComp;
compFuncArray[3] = RDictComp;
compFuncArray[4] = NumComp;
compFuncArray[5] = RNumComp;
}
void SortArray(long numIndexes, short sort_type)
{
if (!(compFuncArray[0]))
InitCompFuncArray();
compFunc = compFuncArray[sort_type];
if (numIndexes)
qs(0L, numIndexes - 1L);
}
void qs(long lb, long ub)
{
long j;
if (lb < ub)
{
if (j = Rearrange(lb, ub))
qs(lb, j - 1L);
qs(j + 1L, ub);
}
}
long Rearrange(long lb, long ub)
{
do
{
while (ub > lb && compFunc(ub, lb) >= 0)
ub--;
if (ub != lb)
{
SwapSorters(ub, lb);
while (lb < ub && compFunc(lb, ub) <= 0)
lb++;
if (lb != ub)
SwapSorters(lb, ub);
}
} while (lb != ub);
return(lb);
}
void SwapSorters(long i, long j)
{
NODE *temp = *(stat_sorter + i);
*(stat_sorter + i) = *(stat_sorter + j);
*(stat_sorter + j) = temp;
}
/* Sort compare functions - ASCII, dictionary, numeric */
short ASCIIComp(long i, long j)
{
return(strcmp((*(stat_sorter + i))->ahvalue->stptr,
(*(stat_sorter + j))->ahvalue->stptr));
}
short RASCIIComp(long i, long j)
{
return(strcmp((*(stat_sorter + j))->ahvalue->stptr,
(*(stat_sorter + i))->ahvalue->stptr));
}
short DictComp(long i, long j)
{
return(PtrLenPtrLenCompare((*(stat_sorter + i))->ahvalue->stptr,
(*(stat_sorter + i))->ahvalue->stlen,
(*(stat_sorter + j))->ahvalue->stptr,
(*(stat_sorter + j))->ahvalue->stlen));
}
short RDictComp(long i, long j)
{
return(PtrLenPtrLenCompare((*(stat_sorter + j))->ahvalue->stptr,
(*(stat_sorter + j))->ahvalue->stlen,
(*(stat_sorter + i))->ahvalue->stptr,
(*(stat_sorter + i))->ahvalue->stlen));
}
short NumComp(long i, long j)
{
if ((*(stat_sorter + i))->ahvalue->numbr <
(*(stat_sorter + j))->ahvalue->numbr)
return(-1);
else if ((*(stat_sorter + i))->ahvalue->numbr ==
(*(stat_sorter + j))->ahvalue->numbr)
return(0);
else
return(1);
}
short RNumComp(long i, long j)
{
if ((*(stat_sorter + i))->ahvalue->numbr <
(*(stat_sorter + j))->ahvalue->numbr)
return(1);
else if ((*(stat_sorter + i))->ahvalue->numbr ==
(*(stat_sorter + j))->ahvalue->numbr)
return(0);
else
return(-1);
}
typedef Byte *BPtr;
short PtrLenPtrLenCompare(Ptr spatPtr, short patLen, Ptr stargetPtr, short targetLen)
{
BPtr patPtr = (BPtr)spatPtr,
patEndPtr = patPtr + patLen,
curPtr = (BPtr)stargetPtr,
curEndPtr = curPtr + targetLen;
short i, j;
while (patPtr < patEndPtr && curPtr < curEndPtr && *patPtr == *curPtr)
{
++patPtr;
++curPtr;
}
if (patPtr == patEndPtr && curPtr == curEndPtr) /* exact match */
return(0);
if (patPtr == patEndPtr && curPtr != curEndPtr) /* pattern too short */
return(-1);
if (patPtr != patEndPtr && curPtr == curEndPtr) /* starget too short */
return(1);
/* true miscompare within string */
i = (short)*patPtr;
j = (short)*curPtr;
/* sequence: nonalpha, AaBbCc...Zz */
/* 'A' = 65, 'a' = 97; move 'U' to 'U'*2 + 256, 'u' to 'u'*2 + 193 */
/* 'A' -> 386, 'B' -> 388, 'a' -> 387, 'b' -> 389 etc */
if ('A' <= i && i <= 'Z')
i = i*2 + 256;
else if ('a' <= i && i <= 'z')
i = i*2 + 193;
if ('A' <= j && j <= 'Z')
j = j*2 + 256;
else if ('a' <= j && j <= 'z')
j = j*2 + 193;
return(i - j);
}